*********************************************************************
*** Figure 1: Physician Earnings over the Lifecyle and by Firm Size

	import delimited "${mypath}/intermediate_csv/desc03-lifecycle_earnings_binscatter.csv", clear asdoub delim(tab)
	destring received_1099ssa, replace i(";")
	
	* Scale variables
	replace ptotinc = ptotinc/1000
	replace income_acs = income_acs/1000
	replace totw2comp = totw2comp/1000
	replace pftloss = pftloss/1000
	replace profinc = profinc/1000 
	
	* Total income 
	#d ;
	tw 	(connected ptotinc agebin if restriction == "All ages; year=2017", lcolor(black) mcolor(black)),  
		xlabel(20(10)70,labsize(medlarge) nogrid) 
		ylabel(,labsize(medlarge)) 
		xtitle("Age", size(medlarge))
		ytitle("") 
		xmtick(##2) 
		ymtick(##2)
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black))
		;
	#d cr
	
	graph export "${individ_figs}/desc03-lifecycle_ptotinc_2017.eps", replace
	
	* Administrative vs. Survey Data
	
	* set label position
	su ptotinc if restriction == "All ages; year=2017; observed in ACS"
		loc max = ceil(`r(max)'/100)*100
	su ptotinc if agebin == 50 & restriction == "All ages; year=2017"
		loc ptotinc_placem = `r(mean)' - ceil(`r(mean)'/25)
	su income_acs if agebin == 50 & restriction == "All ages; year=2017"
		loc inc_allse_placem = `r(mean)' - ceil(`r(mean)'/7)
	
	#d ;
	tw 	(connected ptotinc agebin if restriction == "All ages; year=2017; observed in ACS", lcolor(black) mcolor(black))
		(connected income_acs agebin if restriction == "All ages; year=2017; observed in ACS", lcolor(eltblue) mcolor(eltblue) lpattern(dash) msymbol(D)),
		xlabel(,labsize(medlarge) nogrid) 
		ylabel(,labsize(medlarge)) 
		xtitle("Age", size(medlarge))
		ytitle("") 
		xmtick(##2) 
		ymtick(##2) 
		xscale(range(20 70))
		ylab(,labsize(medium))
		text(`ptotinc_placem' 50 "{bf:Tax Data}", color(black) placement(c))
			text(`inc_allse_placem' 50 "{bf:Restricted-use}" "{bf:ACS}", color(eltblue) placement(c))
		legend(off)
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black))
		;
	#d cr

	graph export "${individ_figs}/desc03-lifecycle_ptotinc_vs_income_acs.eps", replace	
	
	* Contribution of Business Income
	
	* set label position
	su ptotinc if agebin == 50 & restriction == "All ages; year=2017"
		loc ptotinc_placem = `r(mean)' + ceil(`r(mean)'/15)
	su totw2comp if agebin == 50 & restriction == "All ages; year=2017"
		loc total_compensation_placem = `r(mean)' - ceil(`r(mean)'/8)
	su profinc if agebin == 50 & restriction == "All ages; year=2017"
		loc profinc_placem = `r(mean)' - ceil(`r(mean)'/10)
	
	#d ;
	tw 	(connected ptotinc agebin if restriction == "All ages; year=2017", lcolor(black) mcolor(black))
		(connected totw2comp agebin if restriction == "All ages; year=2017", lcolor(eltblue) mcolor(eltblue) lpattern(dash) msymbol(D))
		(connected profinc agebin if restriction == "All ages; year=2017", lcolor("8 48 107") mcolor("8 48 107") lpattern(shortdash) msymbol(T)), 
		xlabel(20(10)70,labsize(medlarge) nogrid)
		ylabel(,labsize(medlarge))
		xtitle("Age", size(medlarge)) xmtick(##2) 
		ytitle("") ymtick(##2) leg(off)
		text(`ptotinc_placem' 50 "{bf:Individual}" "{bf:Total Income}", color(black) placement(c))
		text(`total_compensation_placem' 50 "{bf:W-2 Wages}" "{bf: Only}", color(eltblue) placement(c))
		text(`profinc_placem' 50 "{bf:W-2 Wages +}" "{bf:Business Income}", color("8 48 107") placement(c))
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black))
		;
	
	graph export "${individ_figs}/desc03-lifecycle_ptotinc_vs_w2wgs.eps", replace;
	#d cr	
	
	* Income vs. Firm size
	import delimited using "${mypath}/intermediate_csv/desc08-binscatter_ptotinc_by_einsize.csv", clear asdoub
	
	loc median = median[1]
	loc tmedian = string(round(`median', 1),"%2.0f")
	loc mean = mean[1]
	loc sh_einsize1 = sh_einsize1[1]
	
	# d;
	tw (connected ptotinc einsize, lcolor(black) mcolor(black)),  
		ylab(, nogrid labsize(medlarge)) xlab(0(5)100, labsize(medium))
		ytitle("")  
		xtitle("Firm Size", size(medlarge)) xline(`median', lc(g8) noextend)
		text(470 `median' "Median = `tmedian'", placement(top) color(g8) 
		size(medium) justification(left) box bcolor(white)) 
		title("Annual Income" "($1,000)" , size(medlarge) placement(9) span justification(left) col(black))
	;
	#d cr		
	graph export "${individ_figs}/desc08-income_vs_einsize_under100_age40to55_panel.eps", replace

***********************************************************
*** Figure 2: Correlates of Speciality Income

	import delimited "${mypath}/intermediate_csv/desc05-specialty_graphs.csv", clear asdoub
	keep if sample == "specialty_category; age4055" 
	drop if sample == "lawyer; age4055" 

	replace specialty_markers = subinstr(specialty_markers,"Physician/","",.)
	replace specialty_markers = "Radiology" if specialty_markers == "Diagnostic Radiology"
	replace specialty_markers = "Pediatric" if specialty_markers == "Pediatric Medicine"
	
	* positions for specialty labels
	replace pos = 8		if specialty_markers == "Dermatology" 
	replace pos = 6		if specialty_markers == "Neurosurgery"
	replace pos = 12 	if specialty_markers == "Radiology"
	replace pos = 12 	if specialty_markers == "Ophthalmology"
	replace pos = 12 	if specialty_markers == "Anesthesiology"
	replace pos = 4 	if specialty_markers == "Family Medicine"
	replace pos = 6 	if specialty_markers == "Cardiac Surgery"
	replace pos = 2		if specialty_markers == "Pediatrics"
	replace pos = 1 	if specialty_markers == "Internal Medicine"
	replace pos = 9		if specialty_markers == "Orthopedic Surgery"
	replace pos = 6		if specialty_markers == "Lawyer" 
			
	replace specialty_markers = "" if pos == . 
			
	* Income vs. hours 
	reg ptotinc wkh [aw = freq]
	loc beta: di %04.2fc = e(b)[1,1]
	loc r2: di %04.2fc = e(r2)
	
	#d ;
	tw	(scatter ptotinc wkh [aw = freq], msymbol(circle_hollow) mcolor("8 48 107"))
		(scatter ptotinc wkh if freq < 95000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(1))
		(scatter ptotinc wkh if freq > 95000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(3))
		(lfit ptotinc wkh [aw = freq], lcolor(black)),
		xmtick(##2) 
		xlabel(,labsize(medlarge) nogrid) 
		ylabel(,labsize(medlarge) nogrid) 
		xtitle("Average Weekly Work Hours", size(medlarge)) 
		ytitle("")
		text(1000 40 "{&beta} = `beta'", color(black) size(medium) justification(left)  placement(e))
		text(950 40 "R{sup:2} = `r2'", color(black) size(medium) justification(left)  placement(e))
		legend(off)
		graphr(m(r+3.5 t=0))
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black));
	#d cr
	graph export "${individ_figs}/desc05-income_vs_hours_cms_specage4055.eps", replace
		
	 * Income vs. training length
	 
	* determine graph position of specialty labels
	replace pos = 9		if specialty_markers == "Dermatology" 
	replace pos = 6		if specialty_markers == "Neurosurgery"
	replace pos = 3 	if specialty_markers == "Radiology"
	replace pos = 12 	if specialty_markers == "Ophthalmology"
	replace pos = 12 	if specialty_markers == "Anesthesiology"
	replace pos = 4 	if specialty_markers == "Family Medicine"
	replace pos = 12 	if specialty_markers == "Cardiac Surgery"
	replace pos = 3 	if specialty_markers == "Pediatrics"
	replace pos = 1 	if specialty_markers == "Internal Medicine"
	replace pos = 12	if specialty_markers == "Orthopedic Surgery"
	
	reg ptotinc resdur_allyrs [aw = freq]
	loc beta: di %04.2fc = e(b)[1,1]
	loc r2: di %04.2fc = e(r2)
	
	#d ;
	tw	(scatter ptotinc resdur_allyrs [aw = freq], msymbol(circle_hollow) mcolor("8 48 107"))
		(scatter ptotinc resdur_allyrs if freq < 95000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(2))
		(scatter ptotinc resdur_allyrs if inrange(freq, 95000, 230000) , mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(3))
		(scatter ptotinc resdur_allyrs if freq > 230000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(5))
		(lfit ptotinc resdur_allyrs [aw = freq], lcolor(black)),
		xlabel(3.5(1) 7.5 ,nogrid labsize(medlarge)) 
		xscale(range(3.5 7.5))
		ylabel(,labsize(medlarge) nogrid)
		yscale(r(190 1000))
		xtitle("Average Years of Training", size(medlarge)) 
		ytitle("") 
		text(1000 3.5 "{&beta} = `beta'", color(black) size(medium) justification(left)  placement(e))
		text(950 3.5 "R{sup:2} = `r2'", color(black) size(medium) justification(left)  placement(e))
		legend(off)
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black));
	#d cr
	graph export "${individ_figs}/desc05-income_vs_resdur_allyrs_cms_specage4055.eps", replace
			
	* Residualized share US graduates and income
	import delimited "${mypath}/intermediate_csv/desc05-specialty_graphs.csv", clear asdoub
	keep if sample == "NRMP; age4055" 
	
	* positions for specialty labels
	replace pos = 6 	if nrmp_spec_desc == "Radiology"
	replace pos = 12 	if nrmp_spec_desc == "Ophthalmology"
	replace pos = 1 	if nrmp_spec_desc == "Anesthesiology"
	replace pos = 11	if nrmp_spec_desc == "Dermatology" 
	replace pos = 6		if nrmp_spec_desc == "Neurosurgery"
	replace pos = 12 	if nrmp_spec_desc == "Family Medicine"
	replace pos = 6 	if nrmp_spec_desc == "Internal Medicine"
	replace pos = 3 	if nrmp_spec_desc == "Pediatrics"
	replace pos = 9		if nrmp_spec_desc == "Surgery"
	replace specialty_markers = nrmp_spec_desc if pos != .
	
	reg pred_share_us_sn pred_ptotinc [aw = freq]
	loc beta: di %06.5fc = e(b)[1,1]
	loc r2: di %04.2fc = e(r2)
	
	#d ;	
	tw	(scatter pred_share_us_sn pred_ptotinc [aw = freq], msymbol(circle_hollow) mcolor("8 48 107"))
		(scatter pred_share_us_sn pred_ptotinc if freq < 200000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(1))
		(scatter pred_share_us_sn pred_ptotinc if inrange(freq, 200000, 400000) , mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(3))
		(scatter pred_share_us_sn pred_ptotinc if freq > 400000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(5))
		(lfit pred_share_us_sn pred_ptotinc [aw = freq], lcolor(black)),
		xmtick(##2) 
		xlabel(,labsize(medlarge) nogrid) 
		ylabel(,labsize(medlarge) nogrid) 
		xtitle("Annual Income ($1,000)", size(medlarge)) 
		ytitle("")
		text(1 200 "{&beta} = `beta'", color(black) size(medium) justification(left)  placement(e))
		text(0.97 200 "R{sup:2} = `r2'", color(black) size(medium) justification(left)  placement(e))
		legend(off)
		title("Share of U.S." "MD Graduates", size(medlarge) placement(9) span justification(left) col(black));
	#d cr
		
	graph export "${individ_figs}/desc05-us_md_share_vs_income_nrmp_spec40to55.eps", replace
	
	* Income vs. Publications
	import delimited "${mypath}/intermediate_csv/desc05-specialty_graphs.csv", clear asdoub
	keep if sample == "NRMP; age4055"
	
	preserve
		import excel "${raw_data}/NRMP/step1_summary_stats.xls", clear sheet("step1") firstrow
		keep if n_pub_sn_match != . 
		keep specialty n_pub_sn_match
		replace specialty = "Anesthesiology" if specialty == "Anesthesiology "
		ren specialty nrmp_spec_desc  
		tempfile publications
		save `publications'	
	restore
	
	merge 1:1 nrmp_spec_desc using `publications', nogen keep(match)
	
	replace nrmp_spec_desc = "Orthopedic Surgery" if nrmp_spec_desc == "Orthopaedic Surgery"
	
	* positions for specialty labels
	replace pos = 3 	if nrmp_spec_desc == "Radiology"
	replace pos = 12 	if nrmp_spec_desc == "Ophthalmology"
	replace pos = 9 	if nrmp_spec_desc == "Anesthesiology"
	replace pos = 11	if nrmp_spec_desc == "Dermatology" 
	replace pos = 12	if nrmp_spec_desc == "Neurosurgery"
	replace pos = 9 	if nrmp_spec_desc == "Family Medicine"
	replace pos = 4 	if nrmp_spec_desc == "Internal Medicine"
	replace pos = 10 	if nrmp_spec_desc == "Pediatrics"
	replace pos = 10	if nrmp_spec_desc == "Surgery"
	replace pos = 12	if nrmp_spec_desc == "Orthopedic Surgery"
	replace pos = 6 	if nrmp_spec_desc == "Plastic Surgery"
	replace pos = 5		if nrmp_spec_desc == "Neurology"
	replace pos = 10	if nrmp_spec_desc == "Emergency Medicine"
	
	replace specialty_markers = nrmp_spec_desc if pos != .
	
	reg ptotinc n_pub_sn_match [aw = freq]
	loc beta: di %04.2fc = e(b)[1,1]
	loc r2: di %04.2fc = e(r2)
	
	#d ;	
	tw	(scatter ptotinc n_pub_sn_match [aw = freq], msymbol(circle_hollow) mcolor("8 48 107"))
		(scatter ptotinc n_pub_sn_match if freq < 120000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(1))
		(scatter ptotinc n_pub_sn_match if inrange(freq, 120000, 400000) , mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(3))
		(scatter ptotinc n_pub_sn_match if freq > 400000, mcolor("8 48 107") mlabel(specialty_markers) mlabc("8 48 107") msymbol(none) mlabv(pos) mlabsize(small) mlabgap(4.5))
		(lfit ptotinc n_pub_sn_match [aw = freq], lcolor(black)),
		xmtick(##2) 
		xlabel(,labsize(medlarge) nogrid) 
		ylabel(,labsize(medlarge) nogrid) 
		xtitle("Applicants' Self-Reported No. of Abstracts, Presentations, Publications", size(medlarge)) 
		ytitle("")
		text(1000 0 "{&beta} = `beta'", color(black) size(medium) justification(left)  placement(e))
		text(950 0 "R{sup:2} = `r2'", color(black) size(medium) justification(left)  placement(e))
		legend(off)
		title("Annual Income" "($1,000)", size(medlarge) placement(9) span justification(left) col(black));
	#d cr
		
	graph export "${individ_figs}/desc05-income_vs_publications_age40to55.eps", replace

*****************************************************************
*** Figures 3/E6: Maps for Geographic Variation in Earnings

	* Combine State and CZ-level data

	* Load CZ to State Crosswalk 
	import excel "${raw_data}/crosswalks/geography/czlma903.xls" , clear sheet(CZLMA903) firstrow
		
	g statefips = substr(CountyFIPSCode,1,2)
	destring statefips, replace
	destring CZ90, gen(cz)
		
	collapse (sum) Population1990, by(cz statefips)
	bys cz: egen czpop = total(Population1990)
	bys statefips: egen statepop1990 = total(Population1990)
			
	g shczstatepop = Population1990/czpop
	gsort cz -shczstatepop 
	ren Population1990 czpop1990

	keep statefips cz czpop1990 statepop1990 shczstatepop
	tempfile crosswalk
	save `crosswalk', replace
	
	* Loop over samples
	import delimited using "${mypath}/intermediate_csv/desc09-maps_BigCZ_level.csv", clear asdoub
	levelsof sample, loc(samples)
	loc nsamples = `r(r)'
	
	foreach i of numl 1(1)`nsamples' {
		loc sample: word `i' of `samples'
		di 	"`sample'"
	
		* CZ level
		import delimited using "${mypath}/intermediate_csv/desc09-maps_BigCZ_level.csv", clear asdoub
		keep if sample == "`sample'"
		ren (cz90 n) (cz occ_n_cz)
		g mean_ptotinc_cz = mean_ptotinc/1000
		
		keep *cz* mean_ptotinc_cz 
		tempfile cz90
		save `cz90', replace
		
		* State level	
		import delimited using "${mypath}/intermediate_csv/desc09-maps_state_level.csv", clear asdoub
		keep if sample == "`sample'"
		ren (stfips n) (statefips occ_n_state) 
		g mean_ptotinc_state = mean_ptotinc/1000
		
		keep statefips occ_n_state mean_ptotinc_state 
		tempfile state
		save `state', replace
		
		* Merge cz and state level
		use `crosswalk', clear
		merge m:1 statefips using `state', keep(3) nogen		
		merge m:1 cz using `cz90', nogen assert(1 3)
	
		* Compute residual mean for overlapping CZs as (1990 population) weighted average between state means	
		sort statefips cz
		
		g mean_ptotinc_imputed = mean_ptotinc_state * shczstatepop
		replace mean_ptotinc_imputed = mean_ptotinc_cz if mean_ptotinc_cz != .

		collapse (mean) mean_ptotinc_imputed, by(cz czname)
		
		g sample = "`sample'"
		tempfile sample`i'
		save `sample`i'', replace
	}
	
	clear
	foreach i of numl 1(1)`nsamples' {
		append using `sample`i''
	}
	

	* Plot CZ-level ptotinc
	format mean_ptotinc_imputed %4.0f
	merge m:1 cz using "${raw_data}/maptile_geographies/cz1990_database.dta", assert(3) nogen 
	
	* Set formatting
	loc no_bins = 5
	colorpalette blues, n(`no_bins') nonumbers nograph 
	loc colors "`r(p)'"
	forval i = 1/`no_bins' {
		loc lw "`lw' none"
	}	
	
	* CZ-level mean ptotpinc for 2017, ages 40-55 for physicians
	* legend data bins are set using clbreaks
	su mean_ptotinc_imputed if  sample == "physicians; year 2017; age 40 to 55", d
	
	#d ;
	spmap mean_ptotinc_imputed using "${raw_data}/maptile_geographies/cz1990_coords.dta" if  sample == "physicians; year 2017; age 40 to 55", 
		id(id) clnumber(`no_bins') clmethod(eq) clbreaks(124 300 320 340 360 380 400 420 440  460 563)
		fcolor(`colors') osize(`lw') legstyle(2) legjunction(–)  
		legend(  pos(4) symxsize(*1) symysize(*0.55) rowgap(0) size(medsmall) 
		subtitle("Average Annual" "Income ($1,000)", size(medsmall))) 
	;
	#d cr

	graph export "${individ_figs}/desc09-map_ptotinc_cz90imputed_physicians.eps", replace 

	* CZ-level mean ptotpinc for 2017, ages 40-55 for lawyers
	su mean_ptotinc_imputed if  sample == "lawyers; year 2017; age 40 to 55", d
	
	#d ;
	spmap mean_ptotinc_imputed using "${raw_data}/maptile_geographies/cz1990_coords.dta" if  sample == "lawyers; year 2017; age 40 to 55", 
		id(id) clnumber(`no_bins') clmethod(eq) clbreaks(57 105 150 195 240 285 330 375 420  630)
		fcolor(`colors') osize(`lw') legstyle(2) legjunction(–)  
		legend(  pos(4) symxsize(*1) symysize(*0.55) rowgap(0) size(medsmall) 
		subtitle("Average Annual" "Income ($1,000)", size(medsmall))) 
	;
	#d cr
	
	graph export "${individ_figs}/desc09-map_ptotinc_cz90imputed_lawyers.eps", replace 
	
	* State level	
	import delimited using "${mypath}/intermediate_csv/desc09-maps_state_level.csv", clear asdoub
	ren stfips statefips
	replace mean_ptotinc = mean_ptotinc/1000
	format mean_ptotinc %4.0f
	
	merge m:1 statefips using "${raw_data}/maptile_geographies/state_database_clean.dta", assert(3) nogen 
	ren _polygonid id 
	
	* Set formatting
	loc no_bins = 5
	colorpalette blues, n(`no_bins') nonumbers nograph
	loc colors "`r(p)'"
	forval i = 1/`no_bins' {
		loc lw "`lw' none"
	}	

	* State-level mean ptotpinc for 2017, ages 40-55 for physicians
	su mean_ptotinc if  sample == "lawyers; year 2017; age 40 to 55", d
	
	#d ;
	spmap mean_ptotinc using "${raw_data}/maptile_geographies/state_coords_clean.dta" if  sample == "physicians; year 2017; age 40 to 55", 
		id(id) clnumber(`no_bins') clmethod(quantile) 
		line(data("${raw_data}/maptile_geographies/state_coords_clean.dta") size(0.001) color(gs11) pattern(dots))
		fcolor(`colors') osize(`lw') legstyle(2) legjunction(–)  
		legend(  pos(4) symxsize(*1) symysize(*0.55) rowgap(0) size(medsmall) 
		subtitle("Average Annual" "Income ($1,000)", size(medsmall))) 
	;
	#d cr
	
	graph export "${individ_figs}/desc09-map_ptotinc_state_physicians.eps", replace 
	
	* State-level mean ptotpinc for 2017, ages 40-55 for lawyers
	#d ;
	spmap mean_ptotinc using "${raw_data}/maptile_geographies/state_coords_clean.dta" if  sample == "lawyers; year 2017; age 40 to 55", 
		id(id) clnumber(`no_bins') clmethod(quantile) 
		line(data("${raw_data}/maptile_geographies/state_coords_clean.dta") size(0.001) color(gs11) pattern(dots))
		fcolor(`colors') osize(`lw') legstyle(2) legjunction(–)  
		legend(  pos(4) symxsize(*1) symysize(*0.55) rowgap(0) size(medsmall) 
		subtitle("Average Annual" "Income ($1,000)", size(medsmall))) 
	;
	#d cr
	
	graph export "${individ_figs}/desc09-map_ptotinc_state_lawyers.eps", replace as(eps)

***********************************************************
*** Table 1: Summary Statistics

	import delimited using "${mypath}/intermediate_csv/desc01-sum_stats.csv", asdoub clear

	g subpanel = "1. Number of Observations" if panel == "C"
	replace subpanel = "2. Demographics" if inlist(depvar,"age","female","married","observed_in_acs","is_foreign")
	replace subpanel = "3. Income" if inlist(depvar,"totw2comp","ptotinc","aginc","with25Kbizinc","top1")
	replace subpanel = "4. Career Choices and Characteristics" if inlist(depvar,"einsize","wkh","retired_1099ssa","observe_mdsch","ranked_school","top5_school")
	replace subpanel = "5. Share in Specialty Category" if panel == "B"

	replace depvar = subinstr(depvar, "spec_category_desc: ","",.) 
	replace depvar = proper(depvar) 
	replace depvar = "Non-U.S.-Born" if depvar == "Alien"		
	replace depvar = "Share Observed in ACS" if depvar == "Observed_In_Acs" 
	replace depvar = "Individual Total Income (2017 \\\$)" if depvar == "Ptotinc" 
	replace depvar = "Individual Total Wage (2017 \\\$)" if depvar == "Totw2Comp" 
	replace depvar = "AGI (2017 \\\$)" if depvar == "Adjgross" 
	replace depvar = "Business Income $>$ \USD 25K" if depvar == "With25Kbizinc" 
	replace depvar = "Households in Top 1\% of AGI" if depvar == "Top1"
	replace depvar = "Firm Size (Number of Physicians)" if depvar == "Einsize"
	replace depvar = "Weekly Working Hours (ACS)" if depvar == "Wkh"
	replace depvar = "Retired (Based on 1099-SSA)" if depvar == "Retired_1099Ssa"
	replace depvar = "Name of Medical School Observed" if depvar == "Observe_Mdsch"
	replace depvar = "Graduated from Ranked Medical School" if depvar == "Ranked_School"
	replace depvar = "Graduated from Top-5 Medical School" if depvar == "Top5_School"
	replace depvar = subinstr(depvar,"Ob/Gyn","OB-GYN",.)
	
	gsort sample -share_sample
	replace depno = _n if panel == "B" & sample == "All"
	ren depno tempdep
	bys depvar: egen depno = min(tempdep)
	sort subpanel depno sample

	replace sample = "All \\ Ages" if sample == "year==2017"
	replace sample = "Ages \\ 40 to 55" if sample == "year==2017 & ages 40-55"
	replace sample = "Ages \\ 56 to 70" if sample == "year==2017 & ages 56-70"
	
	* Format data
	replace share_sample = mean if inlist(depvar,"Female","Non-U.S.-Born","Married","Share Observed in ACS","Business Income $>$ \USD 25K","Households in Top 1\% of AGI") | ///
		inlist(depvar,"Retired (Based on 1099-SSA)","Name of Medical School Observed","Graduated from Ranked Medical School","Graduated from Top-5 Medical School") | ///
		subpanel == "Share in Specialty Category"
		
	foreach v of varl count no_unique_npi n_phys_year mean median stdev share_sample {
						
		if inlist("`v'","count","no_unique_npi","n_phys_year") {
			tostring `v', format("%12.0fc") replace force
		}
		if inlist("`v'","mean","median","stdev")  {
			tostring `v', format("%15.1fc") replace force
			replace `v' = subinstr(`v',".00","",.) if strlen(`v') > 4
			replace `v' = subinstr(`v',".0","",.) if strlen(`v') > 4
		}
		if inlist("`v'","share_sample")  {
			tostring `v', format("%04.2fc") replace force
			replace `v' = subinstr(`v',".00","",.) if strlen(`v') > 4
		}
	}
			
	* Table head
	loc tableno " & "
	loc tabspec "ll"
	loc colno = 4
	foreach s of numl 1(1)4 {
		loc tabspec "`tabspec'r"
		loc tableno "`tableno' & \multicolumn{1}{c}{(`s')}"		
	}
	
	texdoc init "${tables}/desc01-sum_stats.tex", replace
		
	tex \begin{table}[h!] 
	tex \caption{\bf Summary Statistics \label{tab:sumstatsbysample}} 
	tex \begin{center}
	tex \resizebox{0.8\textwidth}{!}{
	tex \begin{tabular}{`tabspec'} 
	tex \addlinespace[-2ex] \midrule \midrule 
	tex  & Years: & \multicolumn{1}{c}{2005-2017} & \multicolumn{3}{c}{2017} \\ \cmidrule(lr){3-3} \cmidrule(lr){4-6} 
	tex `tableno' \\ 
	tex  & Ages: & \multicolumn{1}{c}{All} & \multicolumn{1}{c}{All} & \multicolumn{1}{c}{40 to 55} & \multicolumn{1}{c}{56 to 70} \\ 
	tex \midrule \addlinespace[1.5ex]
	
	* Summary Statistics
	tex	Number of Person-Years  	 & & `=n_phys_year[1]'   &  `=n_phys_year[2]'    & `=n_phys_year[3]'    & `=n_phys_year[4]'   \\
	tex	Number of Unique Individuals  & & `=no_unique_npi[1]' &  `=no_unique_npi[2]'  & `=no_unique_npi[3]'  & `=no_unique_npi[4]' \\ 	

	forvalues k = 5(4)`=_N' {
		
		if "`=subpanel[`k']'" != "`=subpanel[`k'-1]'" {
			tex \addlinespace[1ex] \multicolumn{`=`colno'+2'}{l}{\textbf{`=substr("`=subpanel[`k']'", 4, .)'}}  \\ \cmidrule(lr){1-1} 
		}				
		if inlist("`=depvar[`k']'","Female","Non-U.S.-Born","Married","Share Observed in ACS","Business Income $>$ \USD 25K","Households in Top 1\% of AGI") | ///
		inlist("`=depvar[`k']'","Retired (Based on 1099-SSA)","Name of Medical School Observed","Graduated from Ranked Medical School","Graduated from Top-5 Medical School") | ///
		strpos("`=subpanel[`k']'","Share") > 0 {
			tex	`=depvar[`k']'  & & `=share_sample[`k']' &  `=share_sample[`=`k'+1']'  & `=share_sample[`=`k'+2']'  & `=share_sample[`=`k'+3']' \\ 				
		}
		else {			
			tex `=depvar[`k']' & Mean & `=mean[`k']' &  `=mean[`=`k'+1']'  & `=mean[`=`k'+2']'  & `=mean[`=`k'+3']' \\ 
			tex			       & Median & `=median[`k']' &  `=median[`=`k'+1']'  & `=median[`=`k'+2']'  & `=median[`=`k'+3']' \\ 
			tex			       & Std. Dev. & `=stdev[`k']' &  `=stdev[`=`k'+1']'  & `=stdev[`=`k'+2']'  & `=stdev[`=`k'+3']' \\ \addlinespace[1ex]			
		}
	}	
	tex \hline \hline \addlinespace[-3.5ex]
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}

	texdoc close

***********************************************************
*** Table 2: Characteristics of Top Earning Physicians
	
	import delimited using "${mypath}/intermediate_csv/desc04-top_earners.csv", asdoub clear varnames(1)

	foreach var of varl mean_ptotinc median_ptotinc cutoff mean_totw2comp mean_aginc mean_pftloss {
		replace `var' = round(`var' / 1000)
		gen str_`var' = string(`var',"%5.0fc") 
	}
		
	foreach var of varl mean_top5_school {
		gen str_`var' = string(`var',"%4.2f") 
	}

	foreach var of varl mean_general_surgery mean_primary_care mean_neurosurgery mean_cardiology mean_family_practice ///
	mean_statenynj mean_stateca mean_statetx mean_statefl mean_stateaz ///
	mean_with25kbizinc mean_received_1099ssa mean_sh_pftloss mean_sh_w2inc mean_sh_notw2inc ///
	median_sh_pftloss mean_female mean_is_foreign mean_married median_sh_w2inc median_sh_notw2inc {
		gen str_`var' = string(round(`var',0.01),"%04.2f") 
		if 	"`var'" == "mean_received_1099ssa" {
			replace str_`var' = string(round(`var', 0.001),"%04.3f") 
		}
	}
		
	foreach var of varl mean_age mean_wkh n mean_einsize {
		gen str_`var' = string(round(`var'),"%9.0fc") 
	}

	replace str_cutoff = "-" if str_cutoff == "."	
		
	keep str_* sample
	ren str_* *
	drop mean_sh_notw2inc mean_sh_w2inc mean_sh_pftloss

	order n mean_ptotinc median_ptotinc cutoff mean_totw2comp mean_aginc mean_pftloss mean_with25kbizinc ///
	median_sh_pftloss median_sh_notw2inc median_sh_w2inc  mean_wkh mean_received_1099ssa mean_einsize ///
	mean_top5_school mean_cardiology mean_neurosurgery mean_general_surgery mean_primary_care mean_family_practice ///
	mean_age  mean_female mean_is_foreign mean_married ///
	mean_statenynj mean_stateca mean_statefl mean_statetx mean_stateaz ///
	sample
	
	* Transpose
	ds 
	loc varlist = "`r(varlist)'"
	sxpose, clear force
	
	gen rownames = ""
 	forval i= 1/`=_N' {
		loc rowname : word `i' of `varlist'
 		replace rownames = "`rowname'" if _n == `i'
 	}
	
	* Label data 	
	g depno = _n
	g panel = "Income and Labor Supply" if depno < 15
	replace panel = "Specialties and MD Training" if inrange(depno,15,20)
	replace panel = "Demographics" if depno > 20
	replace panel = "" if rownames == "n" | rownames == "sample"

	replace rownames = "Individual Total Income (\\\$1,000);Mean" if rownames == "mean_ptotinc"
	replace rownames = "Individual Total Income (\\\$1,000);Median" if rownames == "median_ptotinc"
	replace rownames = "Individual Total Income (\\\$1,000);Cutoff" if rownames == "cutoff"
	
	replace rownames = "Wage Income (\\\$1,000);Mean" if rownames == "mean_totw2comp"
	replace rownames = "AGI (\\\$1,000);Mean" if rownames == "mean_aginc"
	replace rownames = "Business Income (\\\$1,000);Mean" if rownames == "mean_pftloss"
	replace rownames = "Business Income (\\\$1,000);Share $>$ \USD 25K" if rownames == "mean_with25kbizinc"
	
	replace rownames = "Median Share of Income from Business" if rownames == "median_sh_pftloss"
	replace rownames = "Median Share of Income from Non-Labor" if rownames == "median_sh_notw2inc"
	replace rownames = "Median Share of Income from Labor" if rownames == "median_sh_w2inc"
	
	replace rownames = "Mean Weekly Hours Worked" if rownames == "mean_wkh"
	replace rownames = "Retired (Based on 1099-SSA)" if rownames == "mean_received_1099ssa"
	replace rownames = "Mean Firm Size" if rownames == "mean_einsize"
	
	replace rownames = "Graduated from Top-5 MD Program" if rownames == "mean_top5_school"
	replace rownames = "Cardiology Share" if rownames == "mean_cardiology"
	replace rownames = "Neurosurgery Share" if rownames == "mean_neurosurgery"
	replace rownames = "General Surgery Share"  if rownames == "mean_general_surgery"
	replace rownames = "~~~~~~Family Practice Share" if rownames == "mean_family_practice"
	replace rownames = "Primary Care Share" if rownames == "mean_primary_care"
	
	replace rownames = "Share in New York and New Jersey" if rownames == "mean_statenynj"
	replace rownames = "Share in California" if rownames == "mean_stateca"
	replace rownames = "Share in Florida" if rownames == "mean_statefl"
	replace rownames = "Share in Texas" if rownames == "mean_statetx"
	replace rownames = "Share in Arizona" if rownames == "mean_stateaz"	
	
	replace rownames = "Mean Age" if rownames == "mean_age"
	replace rownames = "Female" if rownames == "mean_female"
	replace rownames = "Non-U.S.-Born" if rownames == "mean_is_foreign"	
	replace rownames = "Married" if rownames == "mean_married"
	
	replace rownames = "Number of Unique Individuals" if rownames == "n"
	replace rownames = "" if rownames == "incbin"
	drop if rownames == "sample"
		
	* Table head 
	order panel rownames _var6 _var5 _var4 _var3 _var2 _var1
			
	loc i = 1
	loc tabspec "ll"
	ds panel depno, not
	foreach var of varl `r(varlist)' {
		ren `var' v`i'
		if `i' < `:word count `r(varlist)'' {
			loc tabspec "`tabspec'r"
			loc ++i
		}			
	}
	loc ++i
	
	* Add descriptions as in 
	split v1, p(";")
	replace v11 = "" if v12 != "Mean" & v12 != ""
			
	texdoc init "${tables}/desc04-top_earners_age40to55.tex", replace
	
	tex \begin{table}[h!] 
	tex \caption{\bf Characteristics of Top Earning Physicians \label{tab:topXpercent}} 
	tex \begin{center}
	tex \resizebox{1\textwidth}{!}{
	tex \begin{tabular}{`tabspec'} 
	tex \midrule \midrule 
	tex & & \multicolumn{6}{c}{\textbf{Top X\% of Physicians by Income}} \\  \cmidrule(lr){3-8} \addlinespace[0.5ex]
	tex & & \multicolumn{1}{c}{1\%} & \multicolumn{1}{c}{5\%} & \multicolumn{1}{c}{10\%} & \multicolumn{1}{c}{25\%} & \multicolumn{1}{c}{50\%} & \multicolumn{1}{c}{All} \\ \midrule \\

	* Descriptive statistics 
	forvalues k = 1/`=_N' {			

		if panel[`k'] != panel[`k'-1] & panel[`k'] != "" tex \\ \multicolumn{`i'}{l}{\textbf{`=panel[`k']'}}  \\ \cmidrule(lr){1-1} 
		
		tex `=v11[`k']' & `=v12[`k']' & `=v2[`k']'  & `=v3[`k']'  & `=v4[`k']' & `=v5[`k']' & `=v6[`k']' & `=v7[`k']' \\ 
		
		if inlist(`k',4,5,6,8,15,24)					tex \addlinespace[2ex] /* Sensitive to order of dependent variables!*/
										
	}
			
	tex \hline \hline
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}
	
	texdoc close

****************************************************************************************
*** Figures E.1/E.2: Distribution of Physician Income/Physician Adjusted Gross Income

	import delimited using "${mypath}/intermediate_csv/desc02-kden_earnings1to99_2017.csv", clear asdoub
	replace varlab = "Log Annual Income" if varlab == "Log-Earnings"
	replace varlab = "Log Adjusted Gross Income" if varlab == "Log AGI (1000 $)"
	
	* Produce histogram 
	foreach y in logptotinc logaginc {
	preserve
		keep if var == "`y'"
		
		gsort xaxis
		loc barwidth = `=xaxis[2]' - `=xaxis[1]' 
		loc mean = `=mean[1]'
		loc tmean = string(round(`=mean[1]', .01),"%04.2f")
		loc sd = string(round(`=sd[1]',.01),"%04.2f")
		loc p1 = `=p1[1]'
		loc p99 = `=p99[1]'
		loc tp1 = string(round(`=p1[1]',.01),"%04.2f")
		loc tp99 = string(round(`=p99[1]',.01),"%04.2f")
		
		qui su frac 
		loc text_height = `r(max)' + `r(max)'/15
		
		levelsof varlab if var == "`y'", loc(label) clean 
		
		#d ;
		tw  (bar frac xaxis, bcolor("8 48 107%75") lw(none) barwidth(`barwidth') bstyle(histogram)),  
			ylab(, nogrid) ytitle("") yscale(r(0 `=`text_height' + (`text_height'/20)'))
			xlab(, nogrid)  
			xtitle("`label'") xline(`mean', lc(g8) noextend) 
			text(`text_height' `mean' " Mean = `tmean'" " SD = `sd'", placement(right) color(g8) size(medsmall) justification(left) box bcolor(none))
			xline(`mean', lc(g8) noextend) 
			text(`text_height' `p1' " " "1% Cutoff = `tp1'", placement(se) color(g8) size(medsmall) justification(right) box bcolor(none))				
			xline(`p1', lc(g8) noextend) 
			text(`text_height' `p99'  " " "99% Cutoff = `tp99'", placement(sw) color(g8) size(medsmall) justification(left) box bcolor(none))
			xline(`p99', lc(g8) noextend)  				
			title("Density", size(medium) placement(9) span justification(left) col(black))
			;
		#d cr
		
		graph export "${individ_figs}/desc02-kden_`y'1to99_2017.eps", replace
		
	restore
	}

*******************************************************
*** Figure E.3: Time Series of Earnings and Firm Size

	import delimited using "${mypath}/intermediate_csv/desc07-time_series_earnings.csv", clear asdoub
		
	* Earnings
	foreach var in ptotinc top1 {
		
		* use locals to assign coordinates to graph elements
		if "`var'" == "ptotinc"				loc ytitle `""Annual Income" "($1,000)""'
		if "`var'" == "top1"				loc ytitle "Share"
		if "`var'" == "ptotinc"				loc ylab "250(25)350"
		if "`var'" == "top1"				loc ylab "0.15(0.05)0.3"
		su adj_`var' if year == 2015
			loc adj_placem = `r(mean)'*1.05
		su notadj_`var' if year == 2010
			loc unadj_placem = `r(mean)'*0.98
		
		#d ;
		tw (connected adj_`var' year, lcolor(black) mcolor(black))
		(connected notadj_`var' year, lcolor(eltblue) mcolor(eltblue) lpattern(dash)),
		xtitle("Year", size(medlarge))
		ytitle("") 
		ymtick(##2)
		ylabel(`ylab',labsize(medlarge) nogrid) 
		xlabel(2005(2)2017, labsize(medlarge) nogrid)
		text(`adj_placem' 2015 "{bf:Regression adjusted}", color(black) placement(n))
		text(`unadj_placem' 2010 "{bf:Unadjusted}", color(eltblue) placement(s))
		legend(off)
		title(`ytitle', size(medlarge) placement(9) span justification(left) col(black))
		;
		#d cr
			
		graph export "${individ_figs}/desc07-time_series_`var'.eps", replace
	}			
	
	* Share Single practice
	import delimited using "${mypath}/intermediate_csv/desc08-time_series_einsize.csv", clear asdoub
		
	* use locals to assign coordinates to graph elements
	su adj_einsize1 if year == 2015
		loc adj_placem = `r(mean)'*1.05 
	su notadj_einsize1 if year == 2010
		loc unadj_placem = `r(mean)'*0.95 
			
	#d ;
	tw (connected adj_einsize1 year, lcolor(black) mcolor(black))
		(connected notadj_einsize1 year, lcolor(eltblue) mcolor(eltblue) lpattern(dash)),
		xtitle("Year", size(medlarge))
		ytitle("") 
		ymtick(##2)
		ylabel(,labsize(medlarge) nogrid) 
		xlabel(2005(2)2017, labsize(medlarge) nogrid)
		text(`adj_placem' 2015 "{bf:Regression adjusted}", color(black) placement(n))
		text(`unadj_placem' 2010 "{bf:Unadjusted}", color(eltblue) placement(s))
		legend(off)
		title("Share" , size(medlarge) placement(9) span justification(left) col(black))
	;
	#d cr

	
	graph export "${individ_figs}/desc08-time_series_einsize.eps", replace	
	
	* Share Einsize missing 
	* use locals to assign coordinates to graph elements
	su adj_einsize_missing if year == 2015
		loc adj_placem = `r(mean)'*1.04 
	su notadj_einsize_missing if year == 2010 
		loc unadj_placem = `r(mean)'*0.97
			
	#d ;
	tw (connected adj_einsize_missing year, lcolor(black) mcolor(black))
		(connected notadj_einsize_missing year, lcolor(eltblue) mcolor(eltblue) lpattern(dash)),
		xtitle("Year", size(medlarge))
		ytitle("") 
		ymtick(##2)
		ylabel(,labsize(medlarge) nogrid) 
		xlabel(2005(2)2017, labsize(medlarge) nogrid)
		text(`adj_placem' 2015 "{bf:Regression adjusted}", color(black) placement(n))
		text(`unadj_placem' 2010 "{bf:Unadjusted}", color(eltblue) placement(ne))
		legend(off)
		title("Share" , size(medlarge) placement(9) span justification(left) col(black)) ;
	#d cr
	
			
	graph export "${individ_figs}/desc08-time_series_einsize_missing.eps", replace	

**************************************************************
*** Figure E.4: Physician's Labor supply over the Lifecyle

	import delimited "${mypath}/intermediate_csv/desc03-lifecycle_earnings_binscatter.csv", clear asdoub
	destring received_1099ssa, replace i(";")
	
	* Weekly hours, Schedule C filing, Retirement 
	loc agerange = "all ages"
	loc xlabs  "20(10)70"
	loc restriction = "All ages; year=2017"
	
	foreach var of varl wkh_imputed schedule_c_flag received_1099ssa {
		if "`var'" == "wkh_imputed"			loc title `""Weekly Work" "Hours (ACS)""'
		if "`var'" == "schedule_c_flag"				loc title "Share"
		if "`var'" == "received_1099ssa" {
			loc title  `""Share" "Retired""'
			loc agerange  "age > 54"
			loc xlabs  "54(2)70"
			loc restriction "age >54; year=2017"
		}	
			
		#d ;
		tw 	(connected `var' agebin if restriction == "`restriction'", lcolor(black) mcolor(black)),  
			xlabel(`xlabs',labsize(medlarge) nogrid) 
			ylabel(,labsize(medlarge) nogrid) 
			xtitle("Age", size(medlarge))
			ytitle("") 
			xmtick(##2) 
			ymtick(##2)
			title(`title', size(medlarge) placement(9) span justification(left) col(black))
			;
		#d cr
		graph export "${individ_figs}/desc03-lifecycle_`var'_2017.eps", replace
	}

**********************************************************************
*** Figure E.5: Medical School Rank vs. Speciality Income

	import delimited using "${mypath}/intermediate_csv/choice_correlating_top5.csv", clear asdoub 
	replace spec_category_desc = "OB-GYN" if spec_category_desc == "Ob/Gyn"

	#d ;
	tw (scatter top5_school_share mean_hourly_income, 
		mlabel(spec_category_desc) mcolor("8 48 107") msymbol(o) mlabsize(medsmall)),
		ytitle("") 
		title("Share of Specialty" "from Top-5" "Medical Schools", size(medlarge) placement(9) span justification(left) col(black))
		xtitle("Hourly Income ($)",size(medlarge)) 
		xlabel(, labsize(medlarge)) 
		ylabel(, labsize(medlarge))
		graphregion(color(white) m(r+2))
	;
	#d cr	
	graph export "${individ_figs}/baseline-06-individual_level_top_school_share_and_income_by_specialty.eps", replace

*****************************************************************************
*** Table E.1: Definition of Speciality Categories

	texdoc init "${tables}/tabl_spec_category_definition.tex", replace

	/*tex
	\begin{table}  \centering
	\caption{\bf Definition of Specialty Categories}
	\label{app_tab:specialty_categories_definition}
	\begin{tabular}{rrp{0.5\textwidth}} \toprule
	Specialty Category&Medicare Specialty Code& Medicare Specialty Description\\ 
		   \cmidrule(lr){1-1}\cmidrule(lr){2-2}\cmidrule(lr){3-3} 
		   \addlinespace
	 \textbf{1} & \multicolumn{2}{l}{\textbf{Primary Care}}\\ 
	 \cmidrule(lr){2-3}
	 &1&General Practice\\ 
	 &8&Family Practice\\ 
	 &11&Internal Medicine\\ 
	 &17&Hospice and Palliative Care\\ 
	 &23&Sports Medicine\\ 
	 &26&Psychiatry\\ 
	 &37&Pediatric Medicine\\ 
	 &38&Geriatric Medicine\\ 
	 &72&Pain Management\\ 
	 &79&Addiction Medicine\\ 
	 &84&Preventive Medicine\\ 
	 &C0&Sleep Medicine\\  \addlinespace
	\textbf{2} & \multicolumn{2}{l}{\textbf{Medicine Subspecialty}}\\
	 \cmidrule(lr){2-3}
	 &3&Allergy/Immunology\\ 
	 &6&Cardiovascular Disease (Cardiology)\\ 
	 &10&Gastroenterology\\ 
	 &21&Clinical Cardiac Electrophysiology\\ 
	 &29&Pulmonary Disease\\ 
	 &39&Nephrology\\ 
	 &44&Infectious Disease\\ 
	 &46&Endocrinology\\ 
	 &66&Rheumatology\\ 
	 &81&Critical Care (Intensivists)\\ 
	 &82&Hematology\\ 
	 &83&Hematology-Oncology\\ 
	 &90&Medical Oncology\\ 
	 &91&Surgical Oncology\\ 
	 &C3&Interventional Cardiology\\ 
	 &C7&Advanced Heart Failure and Transplant Cardiology\\ 
	 &Undefined&Genetics\\ 
	 &Undefined&Hypertension Specialist\\ 
	 &Undefined&Phlebology\\  \addlinespace
	\textbf{3} & \multicolumn{2}{l}{\textbf{Obstetrics \& Gynecology}}\\
	 \cmidrule(lr){2-3}
	 &16&Obstetrics \& Gynecology\\ 
	 &98&Gynecological Oncology\\ 
	 (\ldots & \ldots & continued on next page\ldots)
		 \end{tabular}
	\end{table}

	\begin{table}
		\centering
		\begin{tabular}{rrp{0.5\textwidth}} 
	Specialty Category&Medicare Specialty Code&Medicare Specialty Description\\ 
		 \cmidrule(lr){1-1}       \cmidrule(lr){2-2}  \cmidrule(lr){3-3} 
	(\ldots & \ldots & \ldots continued from previous page) \\[3pt]
	\textbf{4} & \multicolumn{2}{l}{\textbf{Surgery}}\\
	 \cmidrule(lr){2-3}
	 &2&General Surgery\\ 
	 &14&Neurosurgery\\ 
	 &20&Orthopedic Surgery\\ 
	 &24&Plastic and Reconstructive Surgery\\ 
	 &28&Colorectal Surgery (Proctology)\\ 
	 &33&Thoracic Surgery\\ 
	 &40&Hand Surgery\\ 
	 &76&Peripheral Vascular Disease\\ 
	 &78&Cardiac Surgery\\ 
	 &85&Maxillofacial Surgery\\ \addlinespace
	\textbf{5}& \multicolumn{2}{l}{\textbf{Procedural Specialties}}\\
	 \cmidrule(lr){2-3}
	 &4&Otolaryngology\\ 
	 &7&Dermatology\\ 
	 &18&Ophthalmology\\ 
	 &34&Urology\\ \addlinespace
	\textbf{6} & \multicolumn{2}{l}{\textbf{Hospital-Based}}\\
	 \cmidrule(lr){2-3}
	 &22&Pathology\\ 
	 &25&Physical Medicine and Rehabilitation\\ 
	 &93&Emergency Medicine\\ 
	 &C6&Hospitalist\\  
	 &Undefined&Pharmacology, Back Office\\ \addlinespace
	\textbf{7} & \multicolumn{2}{l}{\textbf{Anesthesiology}}\\
	 \cmidrule(lr){2-3}
	 &5&Anesthesiology\\ 
	 &9&Interventional Pain Management\\  \addlinespace
	\textbf{8} & \multicolumn{2}{l}{\textbf{Radiology}}\\
	 \cmidrule(lr){2-3}
	 &30&Diagnostic Radiology\\ 
	 &36&Nuclear Medicine\\ 
	 &92&Radiation Oncology\\ 
	 &94&Interventional Radiology\\ \addlinespace
	\textbf{9} & \multicolumn{2}{l}{\textbf{Neurology}}\\ 
	 \cmidrule(lr){2-3}
	&12&Osteopathic Manipulative Medicine\\ 
	&13&Neurology\\ 
	&86&Neuropsychiatry\\ 
	&Undefined&Electrodiagnostic Medicine\\ 
	 \bottomrule
	 	\end{tabular}
	\end{table}
	tex*/

**********************************************************************
*** Table E.2: Descriptive Variation in Earnings

	import delimited using "${mypath}/intermediate_csv/desc02-earnings_variance.csv", asdoub clear varnames(1) delim(tab)
	drop if _n==1 | regexm(v2, "b/se") | regexm(v2, "logptotinc") | strpos(v1, "Regressors") | strpos(v1, "N_")
	drop if inlist(_n, 13, 14) // drop rows with regression constant
	
	* Remove "=" and quotes from rows 
	foreach v of varlist *{
		replace `v' = substr(`v', 3, strlen(`v')-3)
	}
	
	* formatting 
	replace v1 = "$ R^2 $" if v1 == "R2"
	replace v1 = proper(v1)
	replace v1 = "Business Inc. $>$ \USD 25K"  if v1 == "With25Kbizinc"
	replace v1 = "Graduated from Top-5 Medical School"  if v1 == "Top5_School"
	replace v1 = "Non-U.S.-Born"  if strpos(v1, "Alien")
	replace v1 = "White" if v1 == "white"  
	
	forv k=1/`=_N'{
		loc j =`k'-1
		if "`=v1[`k']'" == ""{
			replace v1 = "SE `=v1[`j']'" if missing(v1) & _n==`k' // v1 now unique id 
		}
	}
	
	ds v1, not
	foreach var of varl `r(varlist)' {
		destring `var', gen(n`var')
		replace `var' = string(n`var',"%04.2f") if v1 != "" 
		replace `var' = "(" + string(n`var',"%05.3f") + ")" if strpos(v1, "SE") 
		replace `var' = string(n`var',"%12.0fc") if v1 == "N"
		replace `var' = "" if `var' == "(.)"
		replace `var' = "" if `var' == "."
	}
	drop n*
	replace v1="" if strpos(v1, "SE")
	
	* table	head	
	loc i = 1
	loc tabspec "l"
	loc tableno " "
		
	ds v1, not
	foreach var of varl `r(varlist)' {
		loc tabspec "`tabspec'c"
		loc tableno "`tableno' & (`i')"		
		loc ++i 				
	}
	
	texdoc init "${tables}/desc02-earnings_variance.tex", replace
	
	tex \begin{table}[h!] 
	tex \caption{\bf Descriptive Variation in Earnings \label{tab:variationearnings}} 
	tex \begin{center}
	tex \resizebox{1\textwidth}{!}{
	tex \begin{tabular}{`tabspec'} 
	tex \midrule \midrule 
	tex & \multicolumn{11}{c}{Dependent Variable: Log Individual Total Income} \\  \cmidrule(lr){2-`i'} 
	tex `tableno' \\ \midrule \addlinespace[1ex]  
		
	* Regression results		
	forvalues k = 1/`=_N' {	
				
		tex	`=v1[`k']' &  `=v2[`k']'  & `=v3[`k']'  & `=v4[`k']' & `=v5[`k']' & `=v6[`k']' & `=v7[`k']' & `=v8[`k']' & `=v9[`k']'  & `=v10[`k']' & `=v11[`k']'  & `=v12[`k']'  \\ 
		
		if "`=v1[`k']'" == "N"  {
			tex \addlinespace[1ex]
			tex Age Fixed Effects 			     & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes \\
			tex Medicare Specialty Fixed Effects & No  & No  & Yes & No  & No  & No  & No  & Yes & Yes & Yes & Yes \\
			tex Commuting Zone Fixed Effects.    & No  & No  & No  & Yes & No  & No  & No  & No  & Yes & Yes & Yes \\
			tex Firm Size Fixed Effects 	     & No  & No  & No  & No  & Yes & No  & No  & No  & Yes & Yes & Yes  \\ 
			tex Birth State Fixed Effects        & No  & Yes & No  & No  & No  & No  & No  & No  & No  & Yes & Yes  \\ \addlinespace[2ex]
		}
		
	}

	tex \hline \hline
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}
	
	texdoc close	

**************************************************************
*** Table E.3: Comparison of ACS and Tax Data

	import delimited using "${mypath}/intermediate_csv/desc06-tax_acs_comparison.csv", asdoub clear

	g table = 1 if _n < 9
	replace table = 2 if inlist(measure,"totw2comp","wag","pftloss","sem_total") & inlist(condition,"year_survey==2017 & physician_acs==1","year_survey==2017 & physician_acs==1 & income_acs>0","year_survey==2017 & physician_acs==1 & ptotinc>0") & table == .
	replace table = 3 if inlist(measure,"pftloss","totw2comp","wag","sem_total") & inlist(condition,"year_survey==2017 & physician_acs==1 & wag>0","year_survey==2017 & physician_acs==1 & totw2comp>0","year_survey==2017 & physician_acs==1 & pftloss>0","year_survey==2017 & physician_acs==1 & sem_total>0") & table == .

	tostring n, format("%12.0fc") replace force
	replace n = "$<$15" if n == "."
	tostring avg_measure, format("%15.0fc") replace force
	replace avg_measure = "\\\$" + avg_measure
	replace avg_measure = "N/A" if _n == 2 & measure == "income_acs"

	preserve 
	keep if table == 1

	texdoc init "${tables}/desc06-tax_acs_comparison.tex", replace

	tex \begin{table}[h!] 
	tex \caption{\bf Comparison of Tax and ACS Data  \label{app_tab:tax_vs_acs_averages}} 
	tex \begin{center}
	tex \resizebox{0.8\textwidth}{!}{
	tex \begin{tabular}{lccc}
	tex \midrule \midrule & & \multicolumn{2}{c}{Mean Individual Total Income} \\ \cmidrule(lr){3-4}  
	tex Sample & N & Tax Data & ACS Data \\ \midrule 
	
	tex 2017 Tax Sample & `=n[1]' & `=avg_measure[1]' & `=avg_measure[`=2']' \\ \addlinespace[1ex]
	tex 2017 Tax Sample $\cap$ 2017 ACS & `=n[3]' & `=avg_measure[3]' & `=avg_measure[`=4']' \\ \addlinespace[1ex]
	tex 2017 Tax Sample $\cap$ 2017 ACS & \multirow{3}{*}{`=n[5]'} & \multirow{3}{*}{`=avg_measure[5]'} & \multirow{3}{*}{`=avg_measure[`=6']'} \\ 
	tex $\cap$ report being a \\ physician in 2017 ACS \\ \addlinespace[1ex]
	tex 2017 Tax Sample $\cap$ 2017 ACS & \multirow{3}{*}{`=n[7]'} & \multirow{3}{*}{`=avg_measure[7]'} & \multirow{3}{*}{`=avg_measure[`=8']'} \\
	tex $\cap$ do not report being a \\ physician in 2017 ACS \\ \addlinespace[1ex]

	tex \hline \hline
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}
	
	texdoc close
	
	restore

***************************************************************
*** Table E.4: Comparison of Tax and ACS Data by Income Type

	preserve 
	keep if table != 1
	sort table measure
	
	texdoc init "${tables}/desc06-self_employment.tex", replace

	tex \begin{table}[h!] 
	tex \caption{\bf Comparison of Tax and ACS Data by Income Type \label{app_tab:tax_vs_acs_docs_wage_vs_se}} 
	tex \begin{center}
	tex \resizebox{0.95\textwidth}{!}{
	tex \begin{tabular}{lcccccc} \midrule \midrule 
	tex & \multicolumn{2}{c}{} 								& \multicolumn{4}{c}{Business Income} \\  \cmidrule(lr){4-7}
	tex & \multicolumn{2}{c}{\$ \textnormal{Wage~} \vert \textnormal{~Wage} > 0 \$} & \multicolumn{2}{c}{Unconditional} & \multicolumn{2}{c}{\$ \vert \textnormal{~Business Inc.} > 0 \$} \\
	tex \cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
	tex & (1) & (2) & (3) & (4) & (5) & (6) \\ 
	tex & Tax & ACS & Tax & ACS & Tax & ACS \\ \midrule

	tex Mean Income & `=avg_measure[7]' & `=avg_measure[8]' & `=avg_measure[1]' & `=avg_measure[2]' & `=avg_measure[5]' & `=avg_measure[6]' \\ \addlinespace[1ex]
	tex N  			& `=n[7]'           & `=n[8]'           & `=n[1]'           & `=n[2]' 			& `=n[5]'           & `=n[6]' \\

	tex \hline \hline
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}
	
	texdoc close
	
	restore

***********************************************************
*** Table E.5: Summary Statistics by Specialty Category

	import delimited using "${mypath}/intermediate_csv/desc05-specialty_stats.csv", asdoub clear varnames(1)
	
	gen yr = substr(string(year), 3, 2)
	replace spec_category_desc = "OB-GYN" if spec_category_desc == "Ob/Gyn"
	foreach v of varl mean median{
		tostring `v', format("%15.2fc") replace force
		replace `v' = subinstr(`v',".00","",.) if strlen(`v') > 4
	}
	
	keep depvar mean median spec_category_desc yr
	ren (mean median) (av md)
	replace depvar = "_wg" if depvar == "totw2comp"
	replace depvar = "_inc" if depvar == "ptotinc"
	replace depvar = "_adj" if depvar == "aginc"
	replace depvar = "_t1" if depvar == "top1"
	reshape wide av md, i(spec_category_desc yr) j(depvar) string
	reshape wide av* md*, i(spec_category_desc) j(yr) string
	
	#d ; 
	local toplab  " & & \multicolumn{2}{c}{\shortstacking{Individual Total \\ Income (2017~\USD)}} & 
					    \multicolumn{2}{c}{\shortstacking{Wage Income \\ (2017~\USD)}}             & 
					    \multicolumn{2}{c}{\shortstacking{Adjusted Gross \\ Income (2017~\USD)}}   & 
					    \multicolumn{2}{c}{\shortstacking{Share in \\ Top 1\%}} 
					"; 
	#d cr
	local tabspec "ll"
	local tableno "&"
	forv i=1/8{ 
		local tabspec "`tabspec'c"
		local tableno "`tableno' & (`i')"
	}

	texdoc init "${tables}/desc05-specialty_stats_2017.tex", replace
	
	tex \begin{table}[h!] 
	tex \caption{\bf Summary Statistics by Specialty Category \label{tab:sumstatsbyspec}} 
	tex \begin{center}
	tex \resizebox{0.8\textwidth}{!}{
	tex \begin{tabular}{`tabspec'} 
	tex \midrule \midrule  
	tex `toplab' \\ \cmidrule(lr){3-4} \cmidrule(lr){5-6} \cmidrule(lr){7-8} \cmidrule(lr){9-10} 
	tex `tableno' \\
	tex & & \multicolumn{1}{c}{2005} & \multicolumn{1}{c}{2017} & \multicolumn{1}{c}{2005} & \multicolumn{1}{c}{2017} & \multicolumn{1}{c}{2005} & \multicolumn{1}{c}{2017} & \multicolumn{1}{c}{2005} & \multicolumn{1}{c}{2017} \\ \midrule \\ 
	
	* Descriptive statistics 
	levelsof spec_category_desc
	foreach spec in `r(levels)'{
		preserve 
			keep if spec_category_desc == "`spec'"
			replace md_t105 = "-"
			replace md_t117 = "-"
			ren spec_category_desc s
			tex \textbf{`=s'} & Mean &`=av_inc05'&`=av_inc17'&`=av_wg05'&`=av_wg17'&`=av_adj05'&`=av_adj17'&`=av_t105'&`=av_t117' \\ 
			tex             & Median &`=md_inc05'&`=md_inc17'&`=md_wg05'&`=md_wg17'&`=md_adj05'&`=md_adj17'&`=md_t105'&`=md_t117' \\
			tex \addlinespace[1ex]	
		restore 
	} 
	tex \hline \hline
	tex \end{tabular}
	tex }
	tex \end{center}
	tex \end{table}
	
	texdoc close	
